# -*- coding: utf-8 -*-

import logging, json
from gensim import corpora, models, similarities
import numpy as np
from src.utils import cut

def run(model_path, json_path, test_texts, stopword_path):
	with open(stopword_path) as f:
		stopwords = [line.strip() for line in f.readlines()]

	with open(json_path) as f: contents = json.loads(f.read())

	dpath = model_path + 'plan.dict'
	mpath = model_path + 'plan.model'
	cpath = model_path + 'plan.corpus'
	for target in test_texts:
		dictionary = corpora.Dictionary.load(dpath)
		tfidf_model = models.TfidfModel.load(mpath)
		corpus_vector = corpora.MmCorpus(cpath)

		target_vector = dictionary.doc2bow(cut(target, stopwords))
		tfidf_target = tfidf_model[target_vector]

		tfidf_corpus = tfidf_model[corpus_vector]
		index = similarities.MatrixSimilarity(tfidf_corpus)
		sim = index[tfidf_target]

		logging.info('Test text:\n%s\n' % target)
		logging.info('Similarity result:\n%s\n' % sim)

		mx = sim.max()
		wh = np.where(sim == mx)
		logging.info('Maximum similarity: [%s %s]' % (contents[wh[0][0]].get('filename'), mx))
